{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true,
    "pycharm": {
     "name": "#%% md\n"
    }
   },
   "source": [
    "# 6 Pandas的函数应用"
   ]
  },
  {
   "cell_type": "code",
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "# Numpy ufunc 函数，randn跟的是维数\n",
    "df = pd.DataFrame(np.random.randn(5,4) - 1)\n",
    "print(df)\n",
    "\n",
    "print(np.abs(df)) #绝对值"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-02-25T08:19:09.188863Z",
     "start_time": "2025-02-25T08:19:09.182071Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2         3\n",
      "0 -1.277213  0.040946 -1.358049  0.630641\n",
      "1 -1.775542  0.711656 -0.554012  0.286910\n",
      "2 -0.232544 -1.551884 -0.213425 -0.304236\n",
      "3 -0.719713 -0.712548  0.559624 -1.753614\n",
      "4 -1.040634 -0.080608 -1.653512 -1.573596\n",
      "          0         1         2         3\n",
      "0  1.277213  0.040946  1.358049  0.630641\n",
      "1  1.775542  0.711656  0.554012  0.286910\n",
      "2  0.232544  1.551884  0.213425  0.304236\n",
      "3  0.719713  0.712548  0.559624  1.753614\n",
      "4  1.040634  0.080608  1.653512  1.573596\n"
     ]
    }
   ],
   "execution_count": 3
  },
  {
   "cell_type": "code",
   "source": [
    "#apply默认作用在列上,x是每一列,因为axis=0\n",
    "print(df.apply(lambda x : x.max()))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T12:20:40.439972Z",
     "start_time": "2025-01-07T12:20:40.433478Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    0.904370\n",
      "1   -0.349281\n",
      "2    0.171395\n",
      "3    0.265598\n",
      "dtype: float64\n"
     ]
    }
   ],
   "execution_count": 4
  },
  {
   "cell_type": "code",
   "source": [
    "#apply作用在行上\n",
    "print(df.apply(lambda x : x.max(), axis=1))"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T12:20:42.972015Z",
     "start_time": "2025-01-07T12:20:42.965975Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    0.265598\n",
      "1   -0.239601\n",
      "2    0.904370\n",
      "3   -0.019906\n",
      "4    0.171395\n",
      "dtype: float64\n"
     ]
    }
   ],
   "execution_count": 5
  },
  {
   "cell_type": "code",
   "source": [
    "# 使用map应用到每个数据\n",
    "print(df.map(lambda x : '%.2f' % x))\n",
    "df.dtypes"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T12:21:57.443738Z",
     "start_time": "2025-01-07T12:21:57.433495Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       0      1      2      3\n",
      "0  -0.58  -1.04   0.11   0.27\n",
      "1  -2.58  -0.64  -0.24  -0.84\n",
      "2   0.90  -0.95  -0.29  -2.81\n",
      "3  -0.02  -1.40  -1.33  -0.40\n",
      "4  -2.12  -0.35   0.17  -1.43\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "0    float64\n",
       "1    float64\n",
       "2    float64\n",
       "3    float64\n",
       "dtype: object"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 6
  },
  {
   "cell_type": "code",
   "source": [
    "type('%.2f' % 1.3456)"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T12:23:39.468831Z",
     "start_time": "2025-01-07T12:23:39.464591Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "str"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 7
  },
  {
   "cell_type": "markdown",
   "source": "## 6.4 索引排序（不重要）",
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "# Series\n",
    "print(np.random.randint(5, size=5))\n",
    "print('-'*50)\n",
    "s4 = pd.Series(range(10, 15), index = np.random.randint(5, size=5)) #索引随机生成\n",
    "print(s4)\n",
    "print('-'*50)\n",
    "# 索引排序,sort_index返回一个新的排好索引的series\n",
    "print(s4.sort_index())\n",
    "print(s4)\n",
    "# s4.loc[0:3]  loc索引值不唯一时直接报错\n",
    "print(s4.iloc[0:3])\n",
    "s4[0:3]  #默认用的位置索引"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T12:24:54.248431Z",
     "start_time": "2025-01-07T12:24:54.240774Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[0 3 2 3 4]\n",
      "--------------------------------------------------\n",
      "2    10\n",
      "2    11\n",
      "1    12\n",
      "0    13\n",
      "4    14\n",
      "dtype: int64\n",
      "--------------------------------------------------\n",
      "0    13\n",
      "1    12\n",
      "2    11\n",
      "2    10\n",
      "4    14\n",
      "dtype: int64\n",
      "2    10\n",
      "2    11\n",
      "1    12\n",
      "0    13\n",
      "4    14\n",
      "dtype: int64\n",
      "2    10\n",
      "2    11\n",
      "1    12\n",
      "dtype: int64\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "2    10\n",
       "2    11\n",
       "1    12\n",
       "dtype: int64"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 14
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "outputs": [],
   "source": [
    "# s4.loc[1:2] #loc索引值唯一时可以切片"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "source": [
    "# DataFrame\n",
    "df4 = pd.DataFrame(np.random.randn(5, 5),\n",
    "                   index=np.random.randint(5, size=5),\n",
    "                   columns=np.random.randint(5, size=5))\n",
    "print(df4)\n",
    "#轴零是行索引排序\n",
    "df4_isort = df4.sort_index(axis=0, ascending=False)\n",
    "print(df4_isort)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T12:28:22.878707Z",
     "start_time": "2025-01-07T12:28:22.865814Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          1         2         1         2         2\n",
      "3 -1.854993 -0.209105 -0.585072 -0.226701 -0.883168\n",
      "2  0.699483 -1.432079 -0.694526 -1.397993  0.780055\n",
      "1  0.027325 -1.372122  0.019027 -1.974483 -0.928084\n",
      "0 -1.281569  1.739098  1.618979 -1.370483  0.828341\n",
      "0 -0.654865  2.201348  0.340671  0.914492  1.490055\n",
      "          1         2         1         2         2\n",
      "3 -1.854993 -0.209105 -0.585072 -0.226701 -0.883168\n",
      "2  0.699483 -1.432079 -0.694526 -1.397993  0.780055\n",
      "1  0.027325 -1.372122  0.019027 -1.974483 -0.928084\n",
      "0 -1.281569  1.739098  1.618979 -1.370483  0.828341\n",
      "0 -0.654865  2.201348  0.340671  0.914492  1.490055\n"
     ]
    }
   ],
   "execution_count": 17
  },
  {
   "cell_type": "code",
   "source": [
    "#轴1是列索引排序\n",
    "df4_isort = df4.sort_index(axis=1, ascending=True)\n",
    "print(df4_isort)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-01-07T12:28:29.270510Z",
     "start_time": "2025-01-07T12:28:29.229716Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          1         1         2         2         2\n",
      "3 -1.854993 -0.585072 -0.209105 -0.226701 -0.883168\n",
      "2  0.699483 -0.694526 -1.432079 -1.397993  0.780055\n",
      "1  0.027325  0.019027 -1.372122 -1.974483 -0.928084\n",
      "0 -1.281569  1.618979  1.739098 -1.370483  0.828341\n",
      "0 -0.654865  0.340671  2.201348  0.914492  1.490055\n"
     ]
    }
   ],
   "execution_count": 18
  },
  {
   "cell_type": "markdown",
   "source": "# 6.5 按值排序（机器学习，深度学习不重要，数据分析才需要）",
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "# 按值排序,by后是column的值\n",
    "import random\n",
    "l=[random.randint(0,100) for i in range(24)] #生成24个随机数\n",
    "df4 = pd.DataFrame(np.array(l).reshape(6,4)) #生成6行4列的dataframe\n",
    "print(df4) #查看数据,ndarray\n",
    "print('-'*50)\n",
    "#按轴零排序，by后是列名,交换的是行\n",
    "df4_vsort = df4.sort_values(by=3,axis=0, ascending=False) #寻找的是columns里的3,重要，ascending=False表示的是降序\n",
    "print(df4_vsort)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T12:32:02.172776Z",
     "start_time": "2025-01-07T12:32:02.166816Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    0   1   2   3\n",
      "0  93  53  72  88\n",
      "1   8  13  23  38\n",
      "2  13  42  74  50\n",
      "3  25  59  31  87\n",
      "4  84  90  38  73\n",
      "5  20  17  59  18\n",
      "--------------------------------------------------\n",
      "    0   1   2   3\n",
      "0  93  53  72  88\n",
      "3  25  59  31  87\n",
      "4  84  90  38  73\n",
      "2  13  42  74  50\n",
      "1   8  13  23  38\n",
      "5  20  17  59  18\n"
     ]
    }
   ],
   "execution_count": 31
  },
  {
   "cell_type": "code",
   "source": [
    "#按轴1排序，by后行索引名，交换的是列\n",
    "df4_vsort = df4.sort_values(by=3,axis=1, ascending=False) #寻找的是index里的3\n",
    "print(df4_vsort)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-01-07T12:32:06.011955Z",
     "start_time": "2025-01-07T12:32:06.006458Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "    3   1   2   0\n",
      "0  88  53  72  93\n",
      "1  38  13  23   8\n",
      "2  50  42  74  13\n",
      "3  87  59  31  25\n",
      "4  73  90  38  84\n",
      "5  18  17  59  20\n"
     ]
    }
   ],
   "execution_count": 32
  },
  {
   "cell_type": "markdown",
   "source": "# 6.6 处理缺失数据（重要）",
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "df_data = pd.DataFrame([np.random.randn(3), [1., 2., np.nan],[np.nan, 4., np.nan], [1., 2., 3.]])\n",
    "print(df_data.head())"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T12:34:22.088436Z",
     "start_time": "2025-01-07T12:34:22.081650Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2\n",
      "0 -0.417012  1.521518 -0.756087\n",
      "1  1.000000  2.000000       NaN\n",
      "2       NaN  4.000000       NaN\n",
      "3  1.000000  2.000000  3.000000\n"
     ]
    }
   ],
   "execution_count": 33
  },
  {
   "cell_type": "code",
   "source": [
    "df_data.iloc[2,0]"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-01-07T12:36:34.591241Z",
     "start_time": "2025-01-07T12:36:34.584592Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "np.float64(nan)"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 34
  },
  {
   "cell_type": "code",
   "source": [
    "#isnull来判断是否有空的数据\n",
    "print(df_data.isnull())"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T12:37:20.602658Z",
     "start_time": "2025-01-07T12:37:20.597098Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       0      1      2\n",
      "0  False  False  False\n",
      "1  False  False   True\n",
      "2   True  False   True\n",
      "3  False  False  False\n"
     ]
    }
   ],
   "execution_count": 35
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-07T12:37:36.594108Z",
     "start_time": "2025-01-07T12:37:36.590146Z"
    }
   },
   "cell_type": "code",
   "source": [
    "#帮我计算df_data缺失率\n",
    "print(df_data.isnull().sum()/len(df_data))"
   ],
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0    0.25\n",
      "1    0.00\n",
      "2    0.50\n",
      "dtype: float64\n"
     ]
    }
   ],
   "execution_count": 37
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 删除缺失数据"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "cell_type": "code",
   "source": [
    "#默认一个样本，任何一个特征缺失，就删除\n",
    "#inplace True是修改的是原有的df\n",
    "#subset=[0]是指按第0列来删除,第0列有空值就删除对应的行\n",
    "print(df_data.dropna(subset=[0]))\n",
    "# df_data"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T12:38:35.141263Z",
     "start_time": "2025-01-07T12:38:35.135218Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          0         1         2\n",
      "0 -0.417012  1.521518 -0.756087\n",
      "1  1.000000  2.000000       NaN\n",
      "3  1.000000  2.000000  3.000000\n"
     ]
    }
   ],
   "execution_count": 42
  },
  {
   "cell_type": "code",
   "source": "df_data   #dropna返回一个新的列表",
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-01-07T12:38:47.665720Z",
     "start_time": "2025-01-07T12:38:47.657029Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "          0         1         2\n",
       "0 -0.417012  1.521518 -0.756087\n",
       "1  1.000000  2.000000       NaN\n",
       "2       NaN  4.000000       NaN\n",
       "3  1.000000  2.000000  3.000000"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.417012</td>\n",
       "      <td>1.521518</td>\n",
       "      <td>-0.756087</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 43
  },
  {
   "cell_type": "code",
   "source": [
    "#用的不多，用在某个特征缺失太多时，才会进行删除\n",
    "print(df_data.dropna(axis=1))  #某列由nan就删除该列"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T12:39:16.109291Z",
     "start_time": "2025-01-07T12:39:16.067356Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "          1\n",
      "0  1.521518\n",
      "1  2.000000\n",
      "2  4.000000\n",
      "3  2.000000\n"
     ]
    }
   ],
   "execution_count": 44
  },
  {
   "cell_type": "code",
   "source": [
    "df_data"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-01-07T07:32:04.356548Z",
     "start_time": "2025-01-07T07:32:04.351917Z"
    }
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "          0         1         2\n",
       "0 -1.264229 -0.514817  1.420473\n",
       "1  1.000000  2.000000       NaN\n",
       "2       NaN  4.000000       NaN\n",
       "3  1.000000  2.000000  3.000000"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-1.264229</td>\n",
       "      <td>-0.514817</td>\n",
       "      <td>1.420473</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 21
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 填充缺失数据"
   ],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "metadata": {},
   "cell_type": "code",
   "outputs": [],
   "execution_count": null,
   "source": "#均值，中位数，众数填充"
  },
  {
   "cell_type": "code",
   "source": [
    "#给零列的空值填为-100，按特征（按列）去填充 即就是填充第0列的所有行\n",
    "print(df_data.iloc[:,0].fillna(-100.))\n",
    "df_data"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T12:40:46.487252Z",
     "start_time": "2025-01-07T12:40:46.479227Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0     -0.417012\n",
      "1      1.000000\n",
      "2   -100.000000\n",
      "3      1.000000\n",
      "Name: 0, dtype: float64\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "          0         1         2\n",
       "0 -0.417012  1.521518 -0.756087\n",
       "1  1.000000  2.000000       NaN\n",
       "2       NaN  4.000000       NaN\n",
       "3  1.000000  2.000000  3.000000"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.417012</td>\n",
       "      <td>1.521518</td>\n",
       "      <td>-0.756087</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>NaN</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 45,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 45
  },
  {
   "cell_type": "code",
   "source": [
    "#依次拿到每一列\n",
    "for i in df_data.columns:\n",
    "    print(df_data.loc[:,i])"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    },
    "ExecuteTime": {
     "end_time": "2025-01-07T12:41:51.513823Z",
     "start_time": "2025-01-07T12:41:51.508670Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0   -0.417012\n",
      "1    1.000000\n",
      "2         NaN\n",
      "3    1.000000\n",
      "Name: 0, dtype: float64\n",
      "0    1.521518\n",
      "1    2.000000\n",
      "2    4.000000\n",
      "3    2.000000\n",
      "Name: 1, dtype: float64\n",
      "0   -0.756087\n",
      "1         NaN\n",
      "2         NaN\n",
      "3    3.000000\n",
      "Name: 2, dtype: float64\n"
     ]
    }
   ],
   "execution_count": 46
  },
  {
   "cell_type": "markdown",
   "source": [],
   "metadata": {
    "collapsed": false
   }
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-07T12:48:18.546451Z",
     "start_time": "2025-01-07T12:48:18.540780Z"
    }
   },
   "cell_type": "code",
   "source": [
    "df_data.iloc[:,0].fillna(-100.,inplace=True) #inplace=True后面会被删除\n",
    "# df_data.iloc[:,0] = df_data.iloc[:,0].fillna(-100.)  #官方推荐"
   ],
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\54439\\AppData\\Local\\Temp\\ipykernel_19752\\2218614896.py:1: FutureWarning: A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.\n",
      "The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.\n",
      "\n",
      "For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.\n",
      "\n",
      "\n",
      "  df_data.iloc[:,0].fillna(-100.,inplace=True) #inplace=True后面会被删除\n"
     ]
    }
   ],
   "execution_count": 55
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-07T12:48:22.065714Z",
     "start_time": "2025-01-07T12:48:22.061539Z"
    }
   },
   "cell_type": "code",
   "source": "df_data.iloc[:,2]=df_data.iloc[:,2].fillna(df_data.iloc[:,2].mean()) #用均值填充空值，此为直接赋值",
   "outputs": [],
   "execution_count": 56
  },
  {
   "metadata": {
    "ExecuteTime": {
     "end_time": "2025-01-07T12:51:14.328641Z",
     "start_time": "2025-01-07T12:51:14.321715Z"
    }
   },
   "cell_type": "code",
   "source": "df_data",
   "outputs": [
    {
     "data": {
      "text/plain": [
       "            0         1         2\n",
       "0   -0.417012  1.521518 -0.756087\n",
       "1    1.000000  2.000000  1.121957\n",
       "2 -100.000000  4.000000  1.121957\n",
       "3    1.000000  2.000000  3.000000"
      ],
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>-0.417012</td>\n",
       "      <td>1.521518</td>\n",
       "      <td>-0.756087</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>1.121957</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>-100.000000</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>1.121957</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1.000000</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ]
     },
     "execution_count": 58,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "execution_count": 58
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
